package com.zte.cl.service.task;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;

import com.zte.cl.model.News;
import com.zte.cl.parser.ContentPagePipeline;
import com.zte.cl.parser.ContentPageProcessor;
import com.zte.cl.utils.ConstantUtil;
import com.zte.cl.utils.UrlUtils;

public class ContenPageTask implements Runnable {
	
	private static final Log log = LogFactory.getLog(ContenPageTask.class);

	private Site getSite(String domain) {
		return Site.me().setSleepTime(ConstantUtil.SITE_SLEEP_TIME).setUserAgent(ConstantUtil.SITE_USER_AGENT);
	}

	private News news;

	public ContenPageTask(News news) {
		this.news = news;
	}

	@Override
	public void run() {
		String domain = UrlUtils.getDomain(news.getPageUrl());
		Spider spider = Spider.create(new ContentPageProcessor(getSite(domain), news)).addUrl(news.getPageUrl())
				.addPipeline(new ContentPagePipeline());
		spider.run();
		spider.close();
	}
}
